#!/usr/bin/env python
# encoding: utf-8
"""
@author: youfeng
@email: youfeng243@163.com
@license: Apache Licence
@file: shixin_company_list.py
@time: 2018/2/4 20:46
"""

import sys

sys.path.append('../..')

from common.mongo import MongDb
from logger import Logger

APP_DATA_CONF = {
    'host': '172.16.215.16',
    'port': 40042,
    'db': 'app_data',
    'username': 'work',
    'password': 'haizhi'
}

# 日志模块
log = Logger('ningbo_shixin_excavate.log').get_logger()

app_data_db = MongDb(APP_DATA_CONF['host'], APP_DATA_CONF['port'], APP_DATA_CONF['db'],
                     APP_DATA_CONF['username'],
                     APP_DATA_CONF['password'], log=log)


def main():
    source_table = 'shixin_info'
    target_table = 'zhejiang_ningbo_shixin_info'
    count = 0
    total = 0
    result_list = []
    for item in app_data_db.traverse_batch(source_table, {}):
        total += 1
        i_name = item.get('i_name')
        court = item.get('court')
        if not isinstance(i_name, basestring):
            continue

        if not isinstance(court, basestring):
            continue

        if '宁波' not in court:
            continue

        i_name = i_name.strip()
        if i_name == '':
            continue

        if len(i_name) <= 4:
            continue

        log.info(i_name)
        count += 1
        result_list.append(item)
        if len(result_list) >= 100:
            app_data_db.insert_batch_data(target_table, result_list)
            del result_list[:]

        if count % 100 == 0:
            log.info("当前进度: total = {} count = {}".format(total, count))

    # 写入数据库
    if len(result_list) > 0:
        app_data_db.insert_batch_data(target_table, result_list)
        del result_list[:]

    # # 写入txt文本
    # for item in result_list:
    #     with open("./shixin_excavate.txt", "a+") as f:
    #         f.write(item.get('i_name') + "\n")

    log.info("查找完成: total = {} count = {}".format(total, count))


if __name__ == '__main__':
    main()
